Bayesian GLMM Part3

Author

Murray Logan

Published

08/07/2025

1 Preparations

Load the necessary libraries

library(tidyverse) # for data wrangling etc
library(rstanarm) # for fitting models in STAN
library(cmdstanr) # for cmdstan
library(brms) # for fitting models in STAN
library(standist) # for exploring distributions
library(HDInterval) # for HPD intervals
library(posterior) # for posterior draws
library(coda) # for diagnostics
library(bayesplot) # for diagnostics
library(ggmcmc) # for MCMC diagnostics
library(rstan) # for interfacing with STAN
library(emmeans) # for marginal means etc
library(broom) # for tidying outputs
library(DHARMa) # for residual diagnostics
library(tidybayes) # for more tidying outputs
library(ggeffects) # for partial plots
library(broom.mixed) # for tidying MCMC outputs
library(patchwork) # for multiple plots
library(ggridges) # for ridge plots
library(bayestestR) # for ROPE
library(see) # for some plots
library(easystats) # framework for stats, modelling and visualisation
library(modelsummary)
source("helperFunctions.R")

2 Scenario

Figure 1: Starlings
Figure 2: Sampling design for the starling data set
Table 1: Format of starling_full.csv data file
SITUATION MONTH MASS BIRD
tree Nov 78 tree1
.. .. .. ..
nest-box Nov 78 nest-box1
.. .. .. ..
inside Nov 79 inside1
.. .. .. ..
other Nov 77 other1
.. .. .. ..
tree Jan 85 tree1
.. .. .. ..
Table 2: Description of the variables in the starling_full data file
SITUATION Categorical listing of roosting situations (tree, nest-box, inside or other)
MONTH Categorical listing of the month of sampling.
MASS Mass (g) of starlings.
BIRD Categorical listing of individual bird repeatedly sampled.

3 Read in the data

starling <- read_csv("../data/starling_full.csv", trim_ws = TRUE)
Rows: 80 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): MONTH, SITUATION, BIRD
dbl (2): subjectnum, MASS

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(starling)
Rows: 80
Columns: 5
$ MONTH      <chr> "Nov", "Nov", "Nov", "Nov", "Nov", "Nov", "Nov", "Nov", "No…
$ SITUATION  <chr> "tree", "tree", "tree", "tree", "tree", "tree", "tree", "tr…
$ subjectnum <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1…
$ BIRD       <chr> "tree1", "tree2", "tree3", "tree4", "tree5", "tree6", "tree…
$ MASS       <dbl> 78, 88, 87, 88, 83, 82, 81, 80, 80, 89, 78, 78, 85, 81, 78,…
## Explore the first 6 rows of the data
head(starling)
# A tibble: 6 × 5
  MONTH SITUATION subjectnum BIRD   MASS
  <chr> <chr>          <dbl> <chr> <dbl>
1 Nov   tree               1 tree1    78
2 Nov   tree               2 tree2    88
3 Nov   tree               3 tree3    87
4 Nov   tree               4 tree4    88
5 Nov   tree               5 tree5    83
6 Nov   tree               6 tree6    82
str(starling)
spc_tbl_ [80 × 5] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ MONTH     : chr [1:80] "Nov" "Nov" "Nov" "Nov" ...
 $ SITUATION : chr [1:80] "tree" "tree" "tree" "tree" ...
 $ subjectnum: num [1:80] 1 2 3 4 5 6 7 8 9 10 ...
 $ BIRD      : chr [1:80] "tree1" "tree2" "tree3" "tree4" ...
 $ MASS      : num [1:80] 78 88 87 88 83 82 81 80 80 89 ...
 - attr(*, "spec")=
  .. cols(
  ..   MONTH = col_character(),
  ..   SITUATION = col_character(),
  ..   subjectnum = col_double(),
  ..   BIRD = col_character(),
  ..   MASS = col_double()
  .. )
 - attr(*, "problems")=<externalptr> 
starling |> datawizard::data_codebook()
starling (80 rows and 5 variables, 5 shown)

ID | Name       | Type      | Missings |    Values |          N
---+------------+-----------+----------+-----------+-----------
1  | MONTH      | character | 0 (0.0%) |       Jan | 40 (50.0%)
   |            |           |          |       Nov | 40 (50.0%)
---+------------+-----------+----------+-----------+-----------
2  | SITUATION  | character | 0 (0.0%) |    inside | 20 (25.0%)
   |            |           |          |  nest-box | 20 (25.0%)
   |            |           |          |     other | 20 (25.0%)
   |            |           |          |      tree | 20 (25.0%)
---+------------+-----------+----------+-----------+-----------
3  | subjectnum | numeric   | 0 (0.0%) |   [1, 10] |         80
---+------------+-----------+----------+-----------+-----------
4  | BIRD       | character | 0 (0.0%) |   inside1 |  2 ( 2.5%)
   |            |           |          |  inside10 |  2 ( 2.5%)
   |            |           |          |   inside2 |  2 ( 2.5%)
   |            |           |          |   inside3 |  2 ( 2.5%)
   |            |           |          |   inside4 |  2 ( 2.5%)
   |            |           |          |   inside5 |  2 ( 2.5%)
   |            |           |          |   inside6 |  2 ( 2.5%)
   |            |           |          |   inside7 |  2 ( 2.5%)
   |            |           |          |   inside8 |  2 ( 2.5%)
   |            |           |          |   inside9 |  2 ( 2.5%)
   |            |           |          |     (...) |           
---+------------+-----------+----------+-----------+-----------
5  | MASS       | numeric   | 0 (0.0%) | [68, 100] |         80
---------------------------------------------------------------
starling |> modelsummary::datasummary_skim()
Unique Missing Pct. Mean SD Min Median Max Histogram
subjectnum 10 0 5.5 2.9 1.0 5.5 10.0
MASS 28 0 83.8 6.7 68.0 84.0 100.0
N %
MONTH Jan 40 50.0
Nov 40 50.0
SITUATION inside 20 25.0
nest-box 20 25.0
other 20 25.0
tree 20 25.0
BIRD inside1 2 2.5
inside10 2 2.5
inside2 2 2.5
inside3 2 2.5
inside4 2 2.5
inside5 2 2.5
inside6 2 2.5
inside7 2 2.5
inside8 2 2.5
inside9 2 2.5
nest-box1 2 2.5
nest-box10 2 2.5
nest-box2 2 2.5
nest-box3 2 2.5
nest-box4 2 2.5
nest-box5 2 2.5
nest-box6 2 2.5
nest-box7 2 2.5
nest-box8 2 2.5
nest-box9 2 2.5
other1 2 2.5
other10 2 2.5
other2 2 2.5
other3 2 2.5
other4 2 2.5
other5 2 2.5
other6 2 2.5
other7 2 2.5
other8 2 2.5
other9 2 2.5
tree1 2 2.5
tree10 2 2.5
tree2 2 2.5
tree3 2 2.5
tree4 2 2.5
tree5 2 2.5
tree6 2 2.5
tree7 2 2.5
tree8 2 2.5
tree9 2 2.5
starling |> modelsummary::datasummary_skim(by = c("SITUATION", "MONTH"))
SITUATION MONTH Unique Missing Pct. Mean SD Min Median Max Histogram
subjectnum tree Nov 10 0 5.5 3.0 1.0 5.5 10.0
nest-box Nov 10 0 5.5 3.0 1.0 5.5 10.0
inside Nov 10 0 5.5 3.0 1.0 5.5 10.0
other Nov 10 0 5.5 3.0 1.0 5.5 10.0
tree Jan 10 0 5.5 3.0 1.0 5.5 10.0
nest-box Jan 10 0 5.5 3.0 1.0 5.5 10.0
inside Jan 10 0 5.5 3.0 1.0 5.5 10.0
other Jan 10 0 5.5 3.0 1.0 5.5 10.0
MASS tree Nov 8 0 83.6 4.0 78.0 82.5 89.0
nest-box Nov 6 0 79.4 3.2 74.0 79.5 85.0
inside Nov 8 0 78.6 3.3 73.0 78.5 84.0
other Nov 8 0 75.4 4.5 68.0 75.0 84.0
tree Jan 9 0 90.8 5.5 85.0 88.5 100.0
nest-box Jan 8 0 90.2 4.4 84.0 89.5 96.0
inside Jan 10 0 88.2 4.0 83.0 87.5 96.0
other Jan 9 0 84.2 4.3 77.0 84.5 90.0
N %
MONTH Jan 40 50.0
Nov 40 50.0
SITUATION inside 20 25.0
nest-box 20 25.0
other 20 25.0
tree 20 25.0
BIRD inside1 2 2.5
inside10 2 2.5
inside2 2 2.5
inside3 2 2.5
inside4 2 2.5
inside5 2 2.5
inside6 2 2.5
inside7 2 2.5
inside8 2 2.5
inside9 2 2.5
nest-box1 2 2.5
nest-box10 2 2.5
nest-box2 2 2.5
nest-box3 2 2.5
nest-box4 2 2.5
nest-box5 2 2.5
nest-box6 2 2.5
nest-box7 2 2.5
nest-box8 2 2.5
nest-box9 2 2.5
other1 2 2.5
other10 2 2.5
other2 2 2.5
other3 2 2.5
other4 2 2.5
other5 2 2.5
other6 2 2.5
other7 2 2.5
other8 2 2.5
other9 2 2.5
tree1 2 2.5
tree10 2 2.5
tree2 2 2.5
tree3 2 2.5
tree4 2 2.5
tree5 2 2.5
tree6 2 2.5
tree7 2 2.5
tree8 2 2.5
tree9 2 2.5

4 Exploratory data analysis

Model formula: \[ \begin{align} y_i &\sim{} \mathcal{N}(\mu_i, \sigma^2)\\ \mu_i &= \beta_0 + \boldsymbol{\beta} \bf{X_i} + \boldsymbol{\gamma} \bf{Z_i}\\ \boldsymbol{\gamma} &= \gamma_0\\ \beta_0 &\sim{} \mathcal{N}(0, 100)\\ \beta &\sim{} \mathcal{N}(0, 10)\\ \gamma_0 &\sim{} \mathcal{N}(0, \sigma_1^2)\\ \sigma &\sim{} \mathcal{cauchy}(0, 2)\\ \sigma_1 &\sim{} \mathcal{cauchy}(0, 2)\\ \end{align} \]

where \(\boldsymbol{\beta}\) and \(\boldsymbol{\gamma}\) are vectors of the fixed and random effects parameters respectively and \(\bf{X}\) is the model matrix representing the overall intercept and effects of roosting situation and month on starling mass. \(\bf{Z}\) represents a cell means model matrix for the random intercepts associated with individual birds.

starling <- starling %>%
  mutate(
    BIRD = factor(BIRD),
    SITUATION = factor(SITUATION),
    MONTH = factor(MONTH, levels = c("Nov", "Jan"))
  )
ggplot(starling, aes(y = MASS, x = MONTH)) +
  geom_boxplot() +
  facet_grid(~SITUATION)

ggplot(starling, aes(y = MASS, x = SITUATION, fill = MONTH)) +
  geom_boxplot()

## Better still
ggplot(starling, aes(y = MASS, x = MONTH, group = BIRD)) +
  geom_point() +
  geom_line() +
  facet_grid(~SITUATION)

Conclusions:

  • it is clear that the Nov mass of each bird is different - so random intercepts
  • the degree to which they change between Nov and Dec is also relatively different per bird - perhaps random intercept/random slope

5 Fit the model

In rstanarm, the default priors are designed to be weakly informative. They are chosen to provide moderate regularisation (to help prevent over-fitting) and help stabilise the computations.

starling.rstanarm <- stan_glmer(MASS ~ MONTH * SITUATION + (1 | BIRD),
  data = starling,
  family = gaussian(),
  iter = 5000,
  warmup = 2000,
  chains = 3,
  thin = 5,
  refresh = 0
)
starling.rstanarm %>% prior_summary()
Priors for model '.' 
------
Intercept (after predictors centered)
  Specified prior:
    ~ normal(location = 84, scale = 2.5)
  Adjusted prior:
    ~ normal(location = 84, scale = 17)

Coefficients
  Specified prior:
    ~ normal(location = [0,0,0,...], scale = [2.5,2.5,2.5,...])
  Adjusted prior:
    ~ normal(location = [0,0,0,...], scale = [33.25,38.40,38.40,...])

Auxiliary (sigma)
  Specified prior:
    ~ exponential(rate = 1)
  Adjusted prior:
    ~ exponential(rate = 0.15)

Covariance
 ~ decov(reg. = 1, conc. = 1, shape = 1, scale = 1)
------
See help('prior_summary.stanreg') for more details

This tells us: - for the intercept (when the family is Gaussian), a normal prior with a mean of 84 and a standard deviation of 17 is used. The 2.5 is used for scaling all parameter standard deviations. The value of 84 is based on the mean of the response (mean(starling$MASS)) and the scaled standard deviation of 17 is based on multiplying the scaling factor by the standard deviation of the response.

2.5 * sd(starling$MASS)
[1] 16.73225
  • for the coefficients (in this case, the effects of MONTH, SEASON and their interactions), the default prior is a normal prior centred around 0 with a standard deviation of 2.5. This is then adjusted for the scale of the data by multiplying the 2.5 by the ratio of the standard deviation of the response by the standard deviation of the numerical dummy variables for the predictor (then rounded).
2.5 * sd(starling$MASS) / apply(model.matrix(~ MONTH * SITUATION, starling)[, -1], 2, sd)
                  MONTHJan          SITUATIONnest-box 
                  33.25470                   38.39922 
            SITUATIONother              SITUATIONtree 
                  38.39922                   38.39922 
MONTHJan:SITUATIONnest-box    MONTHJan:SITUATIONother 
                  50.27638                   50.27638 
    MONTHJan:SITUATIONtree 
                  50.27638 
  • the auxiliary prior represents whatever additional prior is required for the nominated model. In the case of a Gaussian model, this will be \(\sigma\), for negative binomial, it will be the reciprocal of dispersion, for gamma, it will be shape, etc . By default in rstanarm, this is a exponential with a rate of 1 which is then adjusted by division with the standard deviation of the response.
1 / sd(starling$MASS)
[1] 0.149412

Lets now run with priors only so that we can explore the range of values they allow in the posteriors.

starling.rstanarm1 <- update(starling.rstanarm, prior_PD = TRUE)
starling.rstanarm1 %>%
  ggpredict(~ MONTH * SITUATION) %>%
  plot(show_data = TRUE)
Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.
Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

Conclusions:

  • we see that the range of predictions is fairly wide and the predicted means could range from a small value to a relatively large value.

The following link provides some guidance about defining priors. [https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations]

When defining our own priors, we typically do not want them to be scaled.

If we wanted to define our own priors that were less vague, yet still not likely to bias the outcomes, we could try the following priors (mainly plucked out of thin air):

  • \(\beta_0\): normal centred at 84 with a standard deviation of 17
    • mean of 84: since mean(starling$MASS)
    • sd of 17: since 2.5*sd(starling$MASS)
  • \(\beta_1\): normal centred at 0 with a standard deviation of 33
    • sd of 33: since 2.5*sd(starling$MASS) / apply(model.matrix(~MONTH*SITUATION, starling)[,-1], 2, sd)
  • \(\beta_{2-4}\): normal centred at 0 with a standard deviation of 39
    • sd of 39: since 2.5*sd(starling$MASS) / apply(model.matrix(~MONTH*SITUATION, starling)[,-1], 2, sd)
  • \(\beta_{5-7}\): normal centred at 0 with a standard deviation of 50
    • sd of 50: since 2.5*sd(starling$MASS) / apply(model.matrix(~MONTH*SITUATION, starling)[,-1], 2, sd)
  • \(\sigma\): exponential with rate of 0.15
    • since 1 / sd(starling$MASS)
  • \(\Sigma\): decov with:
    • regularisation: the exponent for a LKJ prior on the correlation matrix. A value of 1 (default) implies a joint uniform prior
    • concentration: the concentration parameter for a symmetric Dirichlet distribution. A value of 1 (default) implies a joint uniform distribution
    • shape and scale: the shape and scale parameters for a gamma prior on the scale and scale parameters of the decov prior. A value of 1 for both (default) simplifies the gamma prior to a unit-exponential distribution.

I will also overlay the raw data for comparison.

starling.rstanarm2 <- stan_glmer(MASS ~ MONTH * SITUATION + (1 | BIRD),
  data = starling,
  family = gaussian(),
  prior_intercept = normal(84, 17, autoscale = FALSE),
  prior = normal(0, c(33, 39, 39, 39, 50, 50, 50), autoscale = FALSE),
  prior_aux = rstanarm::exponential(0.15, autoscale = FALSE),
  prior_covariance = decov(1, 1, 1, 1),
  prior_PD = TRUE,
  iter = 5000,
  warmup = 1000,
  chains = 3,
  thin = 5,
  refresh = 0
)
starling.rstanarm2 %>%
  ggpredict(~ SITUATION * MONTH) %>%
  plot(show_data = TRUE)
Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.
Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

Now lets refit, conditioning on the data.

starling.rstanarm3 <- update(starling.rstanarm2, prior_PD = FALSE)
posterior_vs_prior(starling.rstanarm3,
  color_by = "vs", group_by = TRUE,
  facet_args = list(scales = "free_y")
)

Drawing from prior...

Conclusions:

  • in each case, the prior is substantially wider than the posterior, suggesting that the posterior is not biased towards the prior.
ggemmeans(starling.rstanarm3, ~ SITUATION * MONTH) %>% plot(show_data = TRUE)
Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

ggpredict(starling.rstanarm3, ~ SITUATION * MONTH) %>% plot(show_data = TRUE)
Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.
Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

In brms, the default priors are designed to be weakly informative. They are chosen to provide moderate regularisation (to help prevent over fitting) and help stabilise the computations.

Unlike rstanarm, brms models must be compiled before they start sampling. For most models, the compilation of the stan code takes around 45 seconds.

starling.form <- bf(MASS ~ MONTH*SITUATION+(MONTH + MONTH:SITUATION|BIRD),
  family = gaussian()
)
options(width=150)
starling.form %>% get_prior(data = starling)
                 prior     class                       coef group resp dpar nlpar lb ub       source
                (flat)         b                                                             default
                (flat)         b                   MONTHJan                             (vectorized)
                (flat)         b MONTHJan:SITUATIONnestMbox                             (vectorized)
                (flat)         b    MONTHJan:SITUATIONother                             (vectorized)
                (flat)         b     MONTHJan:SITUATIONtree                             (vectorized)
                (flat)         b          SITUATIONnestMbox                             (vectorized)
                (flat)         b             SITUATIONother                             (vectorized)
                (flat)         b              SITUATIONtree                             (vectorized)
                lkj(1)       cor                                                             default
                lkj(1)       cor                             BIRD                       (vectorized)
 student_t(3, 84, 5.9) Intercept                                                             default
  student_t(3, 0, 5.9)        sd                                                   0         default
  student_t(3, 0, 5.9)        sd                             BIRD                  0    (vectorized)
  student_t(3, 0, 5.9)        sd                  Intercept  BIRD                  0    (vectorized)
  student_t(3, 0, 5.9)        sd                   MONTHJan  BIRD                  0    (vectorized)
  student_t(3, 0, 5.9)        sd MONTHJan:SITUATIONnestMbox  BIRD                  0    (vectorized)
  student_t(3, 0, 5.9)        sd    MONTHJan:SITUATIONother  BIRD                  0    (vectorized)
  student_t(3, 0, 5.9)        sd     MONTHJan:SITUATIONtree  BIRD                  0    (vectorized)
  student_t(3, 0, 5.9)        sd MONTHNov:SITUATIONnestMbox  BIRD                  0    (vectorized)
  student_t(3, 0, 5.9)        sd    MONTHNov:SITUATIONother  BIRD                  0    (vectorized)
  student_t(3, 0, 5.9)        sd     MONTHNov:SITUATIONtree  BIRD                  0    (vectorized)
  student_t(3, 0, 5.9)     sigma                                                   0         default
options(width=80)

The following link provides some guidance about defining priors. [https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations]

When defining our own priors, we typically do not want them to be scaled.

If we wanted to define our own priors that were less vague, yet still not likely to bias the outcomes, we could try the following priors (mainly plucked out of thin air):

  • \(\beta_0\): normal centred at 84 with a standard deviation of 5.9
    • mean of 84: since median(starling$MASS)
    • sd of 5.9: since mad(starling$MASS)
  • \(\beta_1\): normal centred at 0 with a standard deviation of 13
    • sd of 13: since sd(starling$MASS) / apply(model.matrix(~MONTH*SITUATION, starling)[, -1], 2, sd)
  • \(\beta_{2-4}\): normal centred at 0 with a standard deviation of 15
    • sd of 15: since sd(starling$MASS) / apply(model.matrix(~MONTH*SITUATION, starling)[, -1], 2, sd)
  • \(\beta_{5-7}\): normal centred at 0 with a standard deviation of 20
    • sd of 20: since sd(starling$MASS) / apply(model.matrix(~MONTH*SITUATION, starling)[, -1], 2, sd)
  • \(\sigma\): gamma with shape parameters 6.5 and 1
    • 6.5: since sd(starling$MASS)
  • \(\sigma_j\): half-cauchy with parameters 0 and 2.5.
    • 2.: since sqrt(sd(starling$MASS))
  • \(\Sigma\): decov with:
    • regularisation: the exponent for a LKJ prior on the correlation matrix. A value of 1 (default) implies a joint uniform prior
    • concentration: the concentration parameter for a symmetric Dirichlet distribution. A value of 1 (default) implies a joint uniform distribution
    • shape and scale: the shape and scale parameters for a gamma prior on the scale and scale parameters of the decov prior. A value of 1 for both (default) simplifies the gamma prior to a unit-exponential distribution.

Note, for hierarchical models, the model will tend to want to have a large \(sigma\) in order to fit the data better. It is a good idea to regularise this tendency by applying a prior that has most mass around zero. Suitable candidates include:

  • half-t: as the degrees of freedom approach infinity, this will approach a half-normal
  • half-cauchy: this is essentially a half-t with 1 degree of freedom
  • exponential

I will also overlay the raw data for comparison.

starling %>%
  group_by(SITUATION, MONTH) %>%
  summarise(
    mean(MASS),
    median(MASS),
    sd(MASS),
    mad(MASS)
  )
`summarise()` has grouped output by 'SITUATION'. You can override using the
`.groups` argument.
# A tibble: 8 × 6
# Groups:   SITUATION [4]
  SITUATION MONTH `mean(MASS)` `median(MASS)` `sd(MASS)` `mad(MASS)`
  <fct>     <fct>        <dbl>          <dbl>      <dbl>       <dbl>
1 inside    Nov           78.6           78.5       3.31        2.22
2 inside    Jan           88.2           87.5       4.05        4.45
3 nest-box  Nov           79.4           79.5       3.20        2.22
4 nest-box  Jan           90.2           89.5       4.37        5.19
5 other     Nov           75.4           75         4.53        2.22
6 other     Jan           84.2           84.5       4.26        4.45
7 tree      Nov           83.6           82.5       4.03        5.19
8 tree      Jan           90.8           88.5       5.47        4.45
sqrt(5.9)
[1] 2.428992
standist::visualize("normal(84,20)", xlim = c(0, 200))

standist::visualize("student_t(3, 0, 5.9)",
  "gamma(6.5,1)",
  "cauchy(0,2.5)",
  "student_t(3, 0, 5.9)",
  xlim = c(-10, 25)
)

priors <- prior(normal(80, 5), class = "Intercept") +
  prior(normal(0, 13), class = "b", coef = "MONTHJan") +
  prior(normal(0, 15), class = "b", coef = "SITUATIONnestMbox") +
  prior(normal(0, 15), class = "b", coef = "SITUATIONother") +
  prior(normal(0, 15), class = "b", coef = "SITUATIONtree") +
  prior(normal(0, 10), class = "b") +
  ## prior(gamma(6.5,1), class = 'sigma') +
  prior(student_t(3, 0, 5), class = "sigma") +
  prior(student_t(3, 0, 5), class = "sd")
starling.form <- bf(MASS ~ MONTH * SITUATION + (1 | BIRD),
  family = gaussian()
)
starling.brm2 <- brm(starling.form,
  data = starling,
  prior = priors,
  sample_prior = "only",
  iter = 5000,
  warmup = 1000,
  chains = 3, cores = 3,
  thin = 5,
  refresh = 0,
  backend = "rstan"
)
Compiling Stan program...
Start sampling
starling.brm2 %>%
  ggpredict(~ SITUATION * MONTH) %>%
  plot(show_data = TRUE)
Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.
Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

starling.brm2 %>%
  conditional_effects("SITUATION:MONTH") %>%
  plot(points = TRUE)

The above seem sufficiently wide whilst at the same time not providing any encouragement for the sampler to wander off into very unsupported areas.

starling.brm3 <- update(starling.brm2,
  sample_prior = "yes",
  control = list(adapt_delta = 0.99),
  refresh = 0
)
The desired updates require recompiling the model
Compiling Stan program...
Start sampling
starling.brm3 %>%
  ggpredict(~ SITUATION * MONTH) %>%
  plot(show_data = TRUE)
Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.
Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

starling.brm3 %>%
  conditional_effects("SITUATION:MONTH") %>%
  plot(points = TRUE)

starling.brm3 %>% get_variables()
 [1] "b_Intercept"                        "b_MONTHJan"                        
 [3] "b_SITUATIONnestMbox"                "b_SITUATIONother"                  
 [5] "b_SITUATIONtree"                    "b_MONTHJan:SITUATIONnestMbox"      
 [7] "b_MONTHJan:SITUATIONother"          "b_MONTHJan:SITUATIONtree"          
 [9] "sd_BIRD__Intercept"                 "sigma"                             
[11] "Intercept"                          "r_BIRD[inside1,Intercept]"         
[13] "r_BIRD[inside10,Intercept]"         "r_BIRD[inside2,Intercept]"         
[15] "r_BIRD[inside3,Intercept]"          "r_BIRD[inside4,Intercept]"         
[17] "r_BIRD[inside5,Intercept]"          "r_BIRD[inside6,Intercept]"         
[19] "r_BIRD[inside7,Intercept]"          "r_BIRD[inside8,Intercept]"         
[21] "r_BIRD[inside9,Intercept]"          "r_BIRD[nest-box1,Intercept]"       
[23] "r_BIRD[nest-box10,Intercept]"       "r_BIRD[nest-box2,Intercept]"       
[25] "r_BIRD[nest-box3,Intercept]"        "r_BIRD[nest-box4,Intercept]"       
[27] "r_BIRD[nest-box5,Intercept]"        "r_BIRD[nest-box6,Intercept]"       
[29] "r_BIRD[nest-box7,Intercept]"        "r_BIRD[nest-box8,Intercept]"       
[31] "r_BIRD[nest-box9,Intercept]"        "r_BIRD[other1,Intercept]"          
[33] "r_BIRD[other10,Intercept]"          "r_BIRD[other2,Intercept]"          
[35] "r_BIRD[other3,Intercept]"           "r_BIRD[other4,Intercept]"          
[37] "r_BIRD[other5,Intercept]"           "r_BIRD[other6,Intercept]"          
[39] "r_BIRD[other7,Intercept]"           "r_BIRD[other8,Intercept]"          
[41] "r_BIRD[other9,Intercept]"           "r_BIRD[tree1,Intercept]"           
[43] "r_BIRD[tree10,Intercept]"           "r_BIRD[tree2,Intercept]"           
[45] "r_BIRD[tree3,Intercept]"            "r_BIRD[tree4,Intercept]"           
[47] "r_BIRD[tree5,Intercept]"            "r_BIRD[tree6,Intercept]"           
[49] "r_BIRD[tree7,Intercept]"            "r_BIRD[tree8,Intercept]"           
[51] "r_BIRD[tree9,Intercept]"            "prior_Intercept"                   
[53] "prior_b_MONTHJan"                   "prior_b_SITUATIONnestMbox"         
[55] "prior_b_SITUATIONother"             "prior_b_SITUATIONtree"             
[57] "prior_b_MONTHJan:SITUATIONnestMbox" "prior_b_MONTHJan:SITUATIONother"   
[59] "prior_b_MONTHJan:SITUATIONtree"     "prior_sigma"                       
[61] "prior_sd_BIRD"                      "lprior"                            
[63] "lp__"                               "accept_stat__"                     
[65] "stepsize__"                         "treedepth__"                       
[67] "n_leapfrog__"                       "divergent__"                       
[69] "energy__"                          
starling.brm3 %>%
  hypothesis("MONTHJan=0") %>%
  plot()

starling.brm3 %>%
  hypothesis("SITUATIONnestMbox=0") %>%
  plot()

starling.brm3 %>% SUYR_prior_and_posterior()

While we are here, we might like to explore a random intercept/slope model

priors <- prior(normal(80, 2), class = "Intercept") +
  # prior(normal(0, 13), class = 'b', coef = "MONTHJan") +
  # prior(normal(0, 15), class = 'b', coef = "SITUATIONnestMbox") +
  # prior(normal(0, 15), class = 'b', coef = "SITUATIONother") +
  # prior(normal(0, 15), class = 'b', coef = "SITUATIONtree") +
  prior(normal(0, 10), class = "b") +
  ## prior(gamma(6.5,1), class = 'sigma') +
  prior(student_t(3, 0, 1), class = "sigma") +
  ## prior(cauchy(0,2.5), class = 'sd') +
  prior(student_t(3, 0, 1), class = "sd") +
  prior(lkj_corr_cholesky(1), class = "cor")

starling.form <- bf(MASS ~ MONTH * SITUATION + (MONTH | BIRD),
  family = gaussian()
)
starling.brm3a <- brm(starling.form,
  data = starling,
  prior = priors,
  sample_prior = "only",
  iter = 5000,
  warmup = 2500,
  chains = 3, cores = 3,
  thin = 5,
  refresh = 0,
  control = list(adapt_delta = 0.99),
  backend = "rstan"
)
Compiling Stan program...
Start sampling
starling.brm4 <- brm(starling.form,
  data = starling,
  prior = priors,
  sample_prior = "yes",
  iter = 5000,
  warmup = 1000,
  chains = 3,
  thin = 5,
  refresh = 0,
  control = list(adapt_delta = 0.99)
)
Compiling Stan program...
Start sampling
starling.brm4 %>% get_variables()
  [1] "b_Intercept"                   "b_MONTHJan"                   
  [3] "b_SITUATIONnestMbox"           "b_SITUATIONother"             
  [5] "b_SITUATIONtree"               "b_MONTHJan:SITUATIONnestMbox" 
  [7] "b_MONTHJan:SITUATIONother"     "b_MONTHJan:SITUATIONtree"     
  [9] "sd_BIRD__Intercept"            "sd_BIRD__MONTHJan"            
 [11] "cor_BIRD__Intercept__MONTHJan" "sigma"                        
 [13] "Intercept"                     "r_BIRD[inside1,Intercept]"    
 [15] "r_BIRD[inside10,Intercept]"    "r_BIRD[inside2,Intercept]"    
 [17] "r_BIRD[inside3,Intercept]"     "r_BIRD[inside4,Intercept]"    
 [19] "r_BIRD[inside5,Intercept]"     "r_BIRD[inside6,Intercept]"    
 [21] "r_BIRD[inside7,Intercept]"     "r_BIRD[inside8,Intercept]"    
 [23] "r_BIRD[inside9,Intercept]"     "r_BIRD[nest-box1,Intercept]"  
 [25] "r_BIRD[nest-box10,Intercept]"  "r_BIRD[nest-box2,Intercept]"  
 [27] "r_BIRD[nest-box3,Intercept]"   "r_BIRD[nest-box4,Intercept]"  
 [29] "r_BIRD[nest-box5,Intercept]"   "r_BIRD[nest-box6,Intercept]"  
 [31] "r_BIRD[nest-box7,Intercept]"   "r_BIRD[nest-box8,Intercept]"  
 [33] "r_BIRD[nest-box9,Intercept]"   "r_BIRD[other1,Intercept]"     
 [35] "r_BIRD[other10,Intercept]"     "r_BIRD[other2,Intercept]"     
 [37] "r_BIRD[other3,Intercept]"      "r_BIRD[other4,Intercept]"     
 [39] "r_BIRD[other5,Intercept]"      "r_BIRD[other6,Intercept]"     
 [41] "r_BIRD[other7,Intercept]"      "r_BIRD[other8,Intercept]"     
 [43] "r_BIRD[other9,Intercept]"      "r_BIRD[tree1,Intercept]"      
 [45] "r_BIRD[tree10,Intercept]"      "r_BIRD[tree2,Intercept]"      
 [47] "r_BIRD[tree3,Intercept]"       "r_BIRD[tree4,Intercept]"      
 [49] "r_BIRD[tree5,Intercept]"       "r_BIRD[tree6,Intercept]"      
 [51] "r_BIRD[tree7,Intercept]"       "r_BIRD[tree8,Intercept]"      
 [53] "r_BIRD[tree9,Intercept]"       "r_BIRD[inside1,MONTHJan]"     
 [55] "r_BIRD[inside10,MONTHJan]"     "r_BIRD[inside2,MONTHJan]"     
 [57] "r_BIRD[inside3,MONTHJan]"      "r_BIRD[inside4,MONTHJan]"     
 [59] "r_BIRD[inside5,MONTHJan]"      "r_BIRD[inside6,MONTHJan]"     
 [61] "r_BIRD[inside7,MONTHJan]"      "r_BIRD[inside8,MONTHJan]"     
 [63] "r_BIRD[inside9,MONTHJan]"      "r_BIRD[nest-box1,MONTHJan]"   
 [65] "r_BIRD[nest-box10,MONTHJan]"   "r_BIRD[nest-box2,MONTHJan]"   
 [67] "r_BIRD[nest-box3,MONTHJan]"    "r_BIRD[nest-box4,MONTHJan]"   
 [69] "r_BIRD[nest-box5,MONTHJan]"    "r_BIRD[nest-box6,MONTHJan]"   
 [71] "r_BIRD[nest-box7,MONTHJan]"    "r_BIRD[nest-box8,MONTHJan]"   
 [73] "r_BIRD[nest-box9,MONTHJan]"    "r_BIRD[other1,MONTHJan]"      
 [75] "r_BIRD[other10,MONTHJan]"      "r_BIRD[other2,MONTHJan]"      
 [77] "r_BIRD[other3,MONTHJan]"       "r_BIRD[other4,MONTHJan]"      
 [79] "r_BIRD[other5,MONTHJan]"       "r_BIRD[other6,MONTHJan]"      
 [81] "r_BIRD[other7,MONTHJan]"       "r_BIRD[other8,MONTHJan]"      
 [83] "r_BIRD[other9,MONTHJan]"       "r_BIRD[tree1,MONTHJan]"       
 [85] "r_BIRD[tree10,MONTHJan]"       "r_BIRD[tree2,MONTHJan]"       
 [87] "r_BIRD[tree3,MONTHJan]"        "r_BIRD[tree4,MONTHJan]"       
 [89] "r_BIRD[tree5,MONTHJan]"        "r_BIRD[tree6,MONTHJan]"       
 [91] "r_BIRD[tree7,MONTHJan]"        "r_BIRD[tree8,MONTHJan]"       
 [93] "r_BIRD[tree9,MONTHJan]"        "prior_Intercept"              
 [95] "prior_b"                       "prior_sigma"                  
 [97] "prior_sd_BIRD"                 "prior_cor_BIRD"               
 [99] "lprior"                        "lp__"                         
[101] "accept_stat__"                 "stepsize__"                   
[103] "treedepth__"                   "n_leapfrog__"                 
[105] "divergent__"                   "energy__"                     
starling.brm4 |>
  hypothesis("MONTHJan=0") |>
  plot()

starling.brm4 |>
  hypothesis("SITUATIONnestMbox=0") |>
  plot()

starling.brm4 %>% SUYR_prior_and_posterior()

starling.brm4 %>%
  posterior_samples() %>%
  dplyr::select(-`lp__`) %>%
  pivot_longer(everything(), names_to = "key") %>%
  filter(!str_detect(key, "^r")) %>%
  mutate(
    Type = ifelse(str_detect(key, "prior"), "Prior", "Posterior"),
    ## Class = ifelse(str_detect(key, 'Intercept'),  'Intercept',
    ##         ifelse(str_detect(key, 'b'),  'b', 'sigma')),
    Class = case_when(
      str_detect(key, "(^b|^prior).*Intercept$") ~ "Intercept",
      str_detect(key, "b_SITUATION.*|prior_b_SITUATION.*") &
        !str_detect(key, ".*:.*") ~ "SITUATION",
      str_detect(key, "b_MONTH.*|prior_b_MONTH.*") &
        !str_detect(key, ".*\\:.*") ~ "MONTH",
      str_detect(key, ".*\\:.*|prior_b_.*\\:.*") ~ "Interaction",
      str_detect(key, "sd") ~ "sd",
      str_detect(key, "^cor|prior_cor") ~ "cor",
      str_detect(key, "sigma") ~ "sigma"
    ),
    Par = str_replace(key, "b_", "")
  ) %>%
  ggplot(aes(x = Type, y = value, color = Par)) +
  stat_pointinterval(position = position_dodge()) +
  facet_wrap(~Class, scales = "free")
Warning: Method 'posterior_samples' is deprecated. Please see ?as_draws for
recommended alternatives.

We can compare the two models using LOO

(l.1 <- starling.brm3 %>% rstan::loo())
Warning: Found 1 observations with a pareto_k > 0.7 in model '.'. We recommend
to set 'moment_match = TRUE' in order to perform moment matching for
problematic observations.

Computed from 2400 by 80 log-likelihood matrix.

         Estimate   SE
elpd_loo   -234.0  5.1
p_loo        13.6  1.6
looic       468.0 10.2
------
MCSE of elpd_loo is NA.
MCSE and ESS estimates assume MCMC draws (r_eff in [0.8, 1.1]).

Pareto k diagnostic values:
                         Count Pct.    Min. ESS
(-Inf, 0.7]   (good)     79    98.8%   383     
   (0.7, 1]   (bad)       1     1.2%   <NA>    
   (1, Inf)   (very bad)  0     0.0%   <NA>    
See help('pareto-k-diagnostic') for details.
(l.2 <- starling.brm4 %>% rstan::loo())

Computed from 2400 by 80 log-likelihood matrix.

         Estimate   SE
elpd_loo   -233.1  5.4
p_loo        15.8  1.9
looic       466.1 10.8
------
MCSE of elpd_loo is 0.2.
MCSE and ESS estimates assume MCMC draws (r_eff in [0.7, 1.0]).

All Pareto k estimates are good (k < 0.7).
See help('pareto-k-diagnostic') for details.
loo_compare(l.1, l.2)
  elpd_diff se_diff
.  0.0       0.0   
. -0.9       0.7   

There is not substantially more support for the more complex random intercept/slope model over the simpler random intercept only model. Consequently, we will continue with the simple random intercept model.

6 MCMC sampling diagnostics

The bayesplot package offers a range of MCMC diagnostics as well as Posterior Probability Checks (PPC), all of which have a convenient plot() interface. Lets start with the MCMC diagnostics.

See list of available diagnostics by name
available_mcmc()
bayesplot MCMC module:
  mcmc_acf
  mcmc_acf_bar
  mcmc_areas
  mcmc_areas_ridges
  mcmc_combo
  mcmc_dens
  mcmc_dens_chains
  mcmc_dens_overlay
  mcmc_hex
  mcmc_hist
  mcmc_hist_by_chain
  mcmc_intervals
  mcmc_neff
  mcmc_neff_hist
  mcmc_nuts_acceptance
  mcmc_nuts_divergence
  mcmc_nuts_energy
  mcmc_nuts_stepsize
  mcmc_nuts_treedepth
  mcmc_pairs
  mcmc_parcoord
  mcmc_rank_ecdf
  mcmc_rank_hist
  mcmc_rank_overlay
  mcmc_recover_hist
  mcmc_recover_intervals
  mcmc_recover_scatter
  mcmc_rhat
  mcmc_rhat_hist
  mcmc_scatter
  mcmc_trace
  mcmc_trace_highlight
  mcmc_violin

Of these, we will focus on:

  • trace: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different shade of blue, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).
pars <- starling.brm3 %>% get_variables()
pars <- pars %>%
  str_extract("^b_.*|[sS]igma|^sd.*") %>%
  ## str_extract('^b.Intercept|^b_SITUTATION.*|^b_MONTH.*|[sS]igma|^sd.*') %>%
  na.omit()
pars
 [1] "b_Intercept"                  "b_MONTHJan"                  
 [3] "b_SITUATIONnestMbox"          "b_SITUATIONother"            
 [5] "b_SITUATIONtree"              "b_MONTHJan:SITUATIONnestMbox"
 [7] "b_MONTHJan:SITUATIONother"    "b_MONTHJan:SITUATIONtree"    
 [9] "sd_BIRD__Intercept"           "sigma"                       
[11] "sigma"                       
attr(,"na.action")
 [1] 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
[26] 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 61
[51] 62 63 64 65 66 67 68 69
attr(,"class")
[1] "omit"
starling.brm3 %>% mcmc_plot(type = "trace", variable = pars)
No divergences to plot.

The chains appear well mixed and very similar

  • acf_bar (autocorrelation function): plots the autocorrelation between successive MCMC sample lags for each parameter and each chain
starling.brm3 %>% mcmc_plot(type = "acf_bar", variable = pars)

There is no evidence of auto-correlation in the MCMC samples

  • rhat_hist: Rhat is a scale reduction factor measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.
starling.brm3 %>% mcmc_plot(type = "rhat_hist")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

All Rhat values are below 1.05, suggesting the chains have converged.

  • neff_hist (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).

    If the ratios are low, tightening the priors may help.

starling.brm2 %>% mcmc_plot(type = "neff_hist")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Ratios all very high.

More diagnostics
starling.brm3 %>% mcmc_plot(type = "combo", variable = pars)

starling.brm3 %>% mcmc_plot(type = "violin", variable = pars)

The rstan package offers a range of MCMC diagnostics. Lets start with the MCMC diagnostics.

Of these, we will focus on:

  • stan_trace: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different colour, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).
starling.brm3 %>% get_variables()
 [1] "b_Intercept"                        "b_MONTHJan"                        
 [3] "b_SITUATIONnestMbox"                "b_SITUATIONother"                  
 [5] "b_SITUATIONtree"                    "b_MONTHJan:SITUATIONnestMbox"      
 [7] "b_MONTHJan:SITUATIONother"          "b_MONTHJan:SITUATIONtree"          
 [9] "sd_BIRD__Intercept"                 "sigma"                             
[11] "Intercept"                          "r_BIRD[inside1,Intercept]"         
[13] "r_BIRD[inside10,Intercept]"         "r_BIRD[inside2,Intercept]"         
[15] "r_BIRD[inside3,Intercept]"          "r_BIRD[inside4,Intercept]"         
[17] "r_BIRD[inside5,Intercept]"          "r_BIRD[inside6,Intercept]"         
[19] "r_BIRD[inside7,Intercept]"          "r_BIRD[inside8,Intercept]"         
[21] "r_BIRD[inside9,Intercept]"          "r_BIRD[nest-box1,Intercept]"       
[23] "r_BIRD[nest-box10,Intercept]"       "r_BIRD[nest-box2,Intercept]"       
[25] "r_BIRD[nest-box3,Intercept]"        "r_BIRD[nest-box4,Intercept]"       
[27] "r_BIRD[nest-box5,Intercept]"        "r_BIRD[nest-box6,Intercept]"       
[29] "r_BIRD[nest-box7,Intercept]"        "r_BIRD[nest-box8,Intercept]"       
[31] "r_BIRD[nest-box9,Intercept]"        "r_BIRD[other1,Intercept]"          
[33] "r_BIRD[other10,Intercept]"          "r_BIRD[other2,Intercept]"          
[35] "r_BIRD[other3,Intercept]"           "r_BIRD[other4,Intercept]"          
[37] "r_BIRD[other5,Intercept]"           "r_BIRD[other6,Intercept]"          
[39] "r_BIRD[other7,Intercept]"           "r_BIRD[other8,Intercept]"          
[41] "r_BIRD[other9,Intercept]"           "r_BIRD[tree1,Intercept]"           
[43] "r_BIRD[tree10,Intercept]"           "r_BIRD[tree2,Intercept]"           
[45] "r_BIRD[tree3,Intercept]"            "r_BIRD[tree4,Intercept]"           
[47] "r_BIRD[tree5,Intercept]"            "r_BIRD[tree6,Intercept]"           
[49] "r_BIRD[tree7,Intercept]"            "r_BIRD[tree8,Intercept]"           
[51] "r_BIRD[tree9,Intercept]"            "prior_Intercept"                   
[53] "prior_b_MONTHJan"                   "prior_b_SITUATIONnestMbox"         
[55] "prior_b_SITUATIONother"             "prior_b_SITUATIONtree"             
[57] "prior_b_MONTHJan:SITUATIONnestMbox" "prior_b_MONTHJan:SITUATIONother"   
[59] "prior_b_MONTHJan:SITUATIONtree"     "prior_sigma"                       
[61] "prior_sd_BIRD"                      "lprior"                            
[63] "lp__"                               "accept_stat__"                     
[65] "stepsize__"                         "treedepth__"                       
[67] "n_leapfrog__"                       "divergent__"                       
[69] "energy__"                          
pars <- starling.brm4 |> get_variables()
pars <- str_extract(pars, "^b_.*|^sigma$|^sd.*") %>% na.omit()

starling.brm4$fit |>
  stan_trace(pars = pars)

The chains appear well mixed and very similar

  • stan_acf (auto-correlation function): plots the auto-correlation between successive MCMC sample lags for each parameter and each chain
starling.brm4$fit |>
  stan_ac(pars = pars)

There is no evidence of auto-correlation in the MCMC samples

  • stan_rhat: Rhat is a scale reduction factor measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.
starling.brm4$fit |> stan_rhat()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

All Rhat values are below 1.05, suggesting the chains have converged.

  • stan_ess (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).

    If the ratios are low, tightening the priors may help.

starling.brm4$fit %>% stan_ess()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Ratios all very high.

starling.brm4$fit %>%
  stan_dens(separate_chains = TRUE, pars = pars)

The ggmean package also as a set of MCMC diagnostic functions. Lets start with the MCMC diagnostics.

Of these, we will focus on:

  • ggs_traceplot: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different colour, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).
## starling.ggs <- starling.brm3 %>% ggs(burnin = FALSE, inc_warmup = FALSE)
## starling.ggs %>% ggs_traceplot()

The chains appear well mixed and very similar

  • gss_autocorrelation (autocorrelation function): plots the autocorrelation between successive MCMC sample lags for each parameter and each chain
## ggs_autocorrelation(starling.ggs)

There is no evidence of auto-correlation in the MCMC samples

  • stan_rhat: Rhat is a scale reduction factor measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.
## ggs_Rhat(starling.ggs)

All Rhat values are below 1.05, suggesting the chains have converged.

  • stan_ess (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).

    If the ratios are low, tightening the priors may help.

## ggs_effective(starling.ggs)

Ratios all very high.

More diagnostics
## ggs_crosscorrelation(starling.ggs)
## ggs_grb(starling.ggs)

7 Model validation

Post predictive checks provide additional diagnostics about the fit of the model. Specifically, they provide a comparison between predictions drawn from the model and the observed data used to train the model.

See list of available diagnostics by name
available_ppc()
bayesplot PPC module:
  ppc_bars
  ppc_bars_grouped
  ppc_boxplot
  ppc_dens
  ppc_dens_overlay
  ppc_dens_overlay_grouped
  ppc_ecdf_overlay
  ppc_ecdf_overlay_grouped
  ppc_error_binned
  ppc_error_hist
  ppc_error_hist_grouped
  ppc_error_scatter
  ppc_error_scatter_avg
  ppc_error_scatter_avg_grouped
  ppc_error_scatter_avg_vs_x
  ppc_freqpoly
  ppc_freqpoly_grouped
  ppc_hist
  ppc_intervals
  ppc_intervals_grouped
  ppc_km_overlay
  ppc_km_overlay_grouped
  ppc_loo_intervals
  ppc_loo_pit_ecdf
  ppc_loo_pit_overlay
  ppc_loo_pit_qq
  ppc_loo_ribbon
  ppc_pit_ecdf
  ppc_pit_ecdf_grouped
  ppc_ribbon
  ppc_ribbon_grouped
  ppc_rootogram
  ppc_scatter
  ppc_scatter_avg
  ppc_scatter_avg_grouped
  ppc_stat
  ppc_stat_2d
  ppc_stat_freqpoly
  ppc_stat_freqpoly_grouped
  ppc_stat_grouped
  ppc_violin_grouped
  • dens_overlay: plots the density distribution of the observed data (black line) overlayed on top of 50 density distributions generated from draws from the model (light blue). Ideally, the 50 realisations should be roughly consistent with the observed data.
starling.brm3 |> pp_check(type = "dens_overlay", nsamples = 100)
Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
instead.

The model draws appear to be consistent with the observed data.

  • error_scatter_avg: this plots the observed values against the average residuals. Similar to a residual plot, we do not want to see any patterns in this plot. Note, this is not really that useful for models that involve a binomial response
starling.brm3 %>% pp_check(type = "error_scatter_avg")
Using all posterior draws for ppc type 'error_scatter_avg' by default.

This is not really interpretable

  • intervals: plots the observed data overlayed on top of posterior predictions associated with each level of the predictor. Ideally, the observed data should all fall within the predictive intervals.
starling.brm3 %>% pp_check(group = "BIRD", type = "intervals")
Using all posterior draws for ppc type 'intervals' by default.
Warning: The following arguments were unrecognized and ignored: group

The shinystan package allows the full suite of MCMC diagnostics and posterior predictive checks to be accessed via a web interface.

# library(shinystan)
# launch_shinystan(starling.brm2)

DHARMa residuals provide very useful diagnostics. Unfortunately, we cannot directly use the simulateResiduals() function to generate the simulated residuals. However, if we are willing to calculate some of the components yourself, we can still obtain the simulated residuals from the fitted stan model.

We need to supply:

  • simulated (predicted) responses associated with each observation.
  • observed values
  • fitted (predicted) responses (averaged) associated with each observation
preds <- starling.brm3 %>% posterior_predict(ndraws = 250, summary = FALSE)
starling.resids <- createDHARMa(
  simulatedResponse = t(preds),
  observedResponse = starling$MASS,
  fittedPredictedResponse = apply(preds, 2, median),
  integerResponse = FALSE
)
plot(starling.resids, quantreg = TRUE)

Conclusions:

  • the simulated residuals do not suggest any issues with the residuals
  • there is no evidence of a lack of fit.
starling.resids <- make_brms_dharma_res(starling.brm3, integerResponse = FALSE)
wrap_elements(~ testUniformity(starling.resids)) +
  wrap_elements(~ plotResiduals(starling.resids, form = factor(rep(1, nrow(starling))))) +
  wrap_elements(~ plotResiduals(starling.resids, quantreg = TRUE)) +
  wrap_elements(~ testDispersion(starling.resids))

8 Partial effects plots

starling.brm4 |>
  conditional_effects("SITUATION:MONTH") |>
  plot(points = TRUE)

starling.brm3 %>%
  ggpredict(~ SITUATION * MONTH) %>%
  plot(show_data = TRUE)
Note: uncertainty of error terms are not taken into account. Consider
  setting `interval` to "prediction". This will call `posterior_predict()`
  instead of `posterior_epred()`.
Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

starling.brm3 %>%
  ggemmeans(~ SITUATION * MONTH) %>%
  plot(show_data = TRUE)
Data points may overlap. Use the `jitter` argument to add some amount of
  random variation to the location of data points and avoid overplotting.

Partial.obs <- starling.brm3$data %>%
  mutate(
    Pred = predict(starling.brm3)[, "Estimate"],
    Resid = resid(starling.brm3)[, "Estimate"],
    Obs = Pred + Resid
  )

starling.brm3 %>%
  fitted_draws(newdata = starling, re_formula = NA) %>%
  median_hdci() %>%
  ggplot(aes(x = SITUATION, y = .value, color = MONTH)) +
  geom_pointrange(aes(ymin = .lower, ymax = .upper)) +
  geom_line() +
  geom_point(
    data = Partial.obs, aes(y = Obs, x = SITUATION, color = MONTH),
    position = position_nudge(x = 0.1)
  ) +
  geom_point(
    data = starling, aes(y = MASS, x = SITUATION, color = MONTH), alpha = 0.2,
    position = position_nudge(x = 0.05)
  )
Warning: `fitted_draws` and `add_fitted_draws` are deprecated as their names were confusing.
- Use [add_]epred_draws() to get the expectation of the posterior predictive.
- Use [add_]linpred_draws() to get the distribution of the linear predictor.
- For example, you used [add_]fitted_draws(..., scale = "response"), which
  means you most likely want [add_]epred_draws(...).
NOTE: When updating to the new functions, note that the `model` parameter is now
  named `object` and the `n` parameter is now named `ndraws`.

9 Model investigation

The summary() method generates simple summaries (mean, standard deviation as well as 10, 50 and 90 percentiles).

starling.brm3 %>% summary()
 Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: MASS ~ MONTH * SITUATION + (1 | BIRD) 
   Data: starling (Number of observations: 80) 
  Draws: 3 chains, each with iter = 5000; warmup = 1000; thin = 5;
         total post-warmup draws = 2400

Multilevel Hyperparameters:
~BIRD (Number of levels: 40) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     1.12      0.72     0.05     2.66 1.00     1842     2213

Regression Coefficients:
                           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
Intercept                     78.68      1.29    76.10    81.18 1.00     2470
MONTHJan                       9.32      1.70     5.99    12.66 1.00     2396
SITUATIONnestMbox              0.73      1.89    -2.82     4.62 1.00     2183
SITUATIONother                -3.30      1.87    -6.88     0.50 1.00     2473
SITUATIONtree                  4.80      1.87     0.97     8.41 1.00     2281
MONTHJan:SITUATIONnestMbox     1.40      2.49    -3.66     6.15 1.00     2342
MONTHJan:SITUATIONother       -0.51      2.49    -5.47     4.34 1.00     2394
MONTHJan:SITUATIONtree        -2.05      2.40    -6.89     2.56 1.00     2288
                           Tail_ESS
Intercept                      2392
MONTHJan                       2326
SITUATIONnestMbox              2334
SITUATIONother                 2251
SITUATIONtree                  2368
MONTHJan:SITUATIONnestMbox     2203
MONTHJan:SITUATIONother        2098
MONTHJan:SITUATIONtree         2267

Further Distributional Parameters:
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     4.10      0.38     3.40     4.95 1.00     2090     2193

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).

Conclusions:

  • the estimated Mass of starlings nesting Inside in November is 78.68
  • starlings that nest Inside were found to increase their Mass from November to January by 9.32 grams.
  • starling that nest in nest boxes are on average 0.73 grams heavier in November than those that nest inside.
  • starling that nest in other and trees are on average 3.3 grams lighter and 4.8 heavier respectively in November than those that nest inside.
  • there is relatively little variation in Mass between individual birds compared to the variation in Mass within Birds.
# A draws_df: 800 iterations, 3 chains, and 63 variables
   b_Intercept b_MONTHJan b_SITUATIONnestMbox b_SITUATIONother b_SITUATIONtree
1           80        8.5               0.154             -3.8             2.7
2           77       10.0               1.824             -2.6             7.0
3           78        7.9              -0.664             -4.8             7.8
4           79        8.8              -1.152             -5.2             6.0
5           78       10.4               0.070             -4.9             5.9
6           78       11.4               2.850             -2.2             7.3
7           81        6.4              -1.980             -3.4             3.4
8           77       11.3               2.359             -1.2             7.2
9           79       11.4               0.057             -2.2             3.6
10          79        8.5               2.368             -5.0             5.7
   b_MONTHJan:SITUATIONnestMbox b_MONTHJan:SITUATIONother
1                          2.06                      0.28
2                         -1.61                     -0.20
3                          1.77                      1.02
4                          4.54                      1.91
5                          2.71                      2.49
6                          0.92                     -0.26
7                          6.29                      2.23
8                         -1.02                     -3.31
9                          1.90                     -3.65
10                        -0.97                      0.74
   b_MONTHJan:SITUATIONtree
1                      2.83
2                     -0.72
3                     -1.61
4                     -3.51
5                     -3.87
6                     -4.40
7                      0.35
8                     -3.81
9                     -2.39
10                    -2.86
# ... with 2390 more draws, and 55 more variables
# ... hidden reserved variables {'.chain', '.iteration', '.draw'}
# A tibble: 63 × 8
   variable                 median    lower  upper      Pl     Pg  rhat ess_bulk
   <chr>                     <dbl>    <dbl>  <dbl>   <dbl>  <dbl> <dbl>    <dbl>
 1 b_Intercept              78.7   76.3     81.3   0       1      1.000    2470.
 2 b_MONTHJan                9.32   6.01    12.7   0       1      1.00     2396.
 3 b_SITUATIONnestMbox       0.679 -2.76     4.67  0.36    0.64   0.999    2183.
 4 b_SITUATIONother         -3.33  -6.88     0.508 0.958   0.0421 1.000    2472.
 5 b_SITUATIONtree           4.80   0.921    8.29  0.00667 0.993  1.00     2281.
 6 b_MONTHJan:SITUATIONnes…  1.39  -3.41     6.38  0.28    0.72   1.000    2342.
 7 b_MONTHJan:SITUATIONoth… -0.459 -5.37     4.44  0.577   0.423  1.00     2394.
 8 b_MONTHJan:SITUATIONtree -2.07  -6.91     2.56  0.808   0.192  1.00     2287.
 9 sd_BIRD__Intercept        1.06   0.00118  2.40  0       1      1.00     1842.
10 sigma                     4.08   3.30     4.80  0       1      1.00     2090.
# ℹ 53 more rows
starling.brm3$fit %>%
  tidyMCMC(
    estimate.method = "median",
    conf.int = TRUE, conf.method = "HPDinterval",
    rhat = TRUE, ess = TRUE
  )
# A tibble: 62 × 7
   term                        estimate std.error conf.low conf.high  rhat   ess
   <chr>                          <dbl>     <dbl>    <dbl>     <dbl> <dbl> <int>
 1 b_Intercept                   78.7       1.29  76.3        81.3   1.000  2466
 2 b_MONTHJan                     9.32      1.70   6.01       12.7   1.000  2437
 3 b_SITUATIONnestMbox            0.726     1.89  -2.76        4.67  0.999  2159
 4 b_SITUATIONother              -3.30      1.87  -6.88        0.508 1.000  2476
 5 b_SITUATIONtree                4.80      1.87   0.921       8.29  1.00   2293
 6 b_MONTHJan:SITUATIONnestMb…    1.40      2.49  -3.41        6.38  0.999  2340
 7 b_MONTHJan:SITUATIONother     -0.506     2.49  -5.37        4.44  0.999  2376
 8 b_MONTHJan:SITUATIONtree      -2.05      2.40  -6.91        2.56  1.00   2283
 9 sd_BIRD__Intercept             1.12      0.719  0.00118     2.40  1.00   1838
10 sigma                          4.10      0.385  3.30        4.80  1.000  2110
# ℹ 52 more rows

Conclusions:

see above

starling.brm3 %>% get_variables()
 [1] "b_Intercept"                        "b_MONTHJan"                        
 [3] "b_SITUATIONnestMbox"                "b_SITUATIONother"                  
 [5] "b_SITUATIONtree"                    "b_MONTHJan:SITUATIONnestMbox"      
 [7] "b_MONTHJan:SITUATIONother"          "b_MONTHJan:SITUATIONtree"          
 [9] "sd_BIRD__Intercept"                 "sigma"                             
[11] "Intercept"                          "r_BIRD[inside1,Intercept]"         
[13] "r_BIRD[inside10,Intercept]"         "r_BIRD[inside2,Intercept]"         
[15] "r_BIRD[inside3,Intercept]"          "r_BIRD[inside4,Intercept]"         
[17] "r_BIRD[inside5,Intercept]"          "r_BIRD[inside6,Intercept]"         
[19] "r_BIRD[inside7,Intercept]"          "r_BIRD[inside8,Intercept]"         
[21] "r_BIRD[inside9,Intercept]"          "r_BIRD[nest-box1,Intercept]"       
[23] "r_BIRD[nest-box10,Intercept]"       "r_BIRD[nest-box2,Intercept]"       
[25] "r_BIRD[nest-box3,Intercept]"        "r_BIRD[nest-box4,Intercept]"       
[27] "r_BIRD[nest-box5,Intercept]"        "r_BIRD[nest-box6,Intercept]"       
[29] "r_BIRD[nest-box7,Intercept]"        "r_BIRD[nest-box8,Intercept]"       
[31] "r_BIRD[nest-box9,Intercept]"        "r_BIRD[other1,Intercept]"          
[33] "r_BIRD[other10,Intercept]"          "r_BIRD[other2,Intercept]"          
[35] "r_BIRD[other3,Intercept]"           "r_BIRD[other4,Intercept]"          
[37] "r_BIRD[other5,Intercept]"           "r_BIRD[other6,Intercept]"          
[39] "r_BIRD[other7,Intercept]"           "r_BIRD[other8,Intercept]"          
[41] "r_BIRD[other9,Intercept]"           "r_BIRD[tree1,Intercept]"           
[43] "r_BIRD[tree10,Intercept]"           "r_BIRD[tree2,Intercept]"           
[45] "r_BIRD[tree3,Intercept]"            "r_BIRD[tree4,Intercept]"           
[47] "r_BIRD[tree5,Intercept]"            "r_BIRD[tree6,Intercept]"           
[49] "r_BIRD[tree7,Intercept]"            "r_BIRD[tree8,Intercept]"           
[51] "r_BIRD[tree9,Intercept]"            "prior_Intercept"                   
[53] "prior_b_MONTHJan"                   "prior_b_SITUATIONnestMbox"         
[55] "prior_b_SITUATIONother"             "prior_b_SITUATIONtree"             
[57] "prior_b_MONTHJan:SITUATIONnestMbox" "prior_b_MONTHJan:SITUATIONother"   
[59] "prior_b_MONTHJan:SITUATIONtree"     "prior_sigma"                       
[61] "prior_sd_BIRD"                      "lprior"                            
[63] "lp__"                               "accept_stat__"                     
[65] "stepsize__"                         "treedepth__"                       
[67] "n_leapfrog__"                       "divergent__"                       
[69] "energy__"                          
starling.draw <- starling.brm3 %>%
  gather_draws(`b.Intercept.*|b_SITUATION.*|b_MONTH.*`, regex = TRUE)
starling.draw
# A tibble: 19,200 × 5
# Groups:   .variable [8]
   .chain .iteration .draw .variable   .value
    <int>      <int> <int> <chr>        <dbl>
 1      1          1     1 b_Intercept   80.3
 2      1          2     2 b_Intercept   77.3
 3      1          3     3 b_Intercept   78.1
 4      1          4     4 b_Intercept   79.0
 5      1          5     5 b_Intercept   78.3
 6      1          6     6 b_Intercept   77.5
 7      1          7     7 b_Intercept   80.6
 8      1          8     8 b_Intercept   76.7
 9      1          9     9 b_Intercept   78.6
10      1         10    10 b_Intercept   79.5
# ℹ 19,190 more rows

We can then summarise this

starling.draw %>% median_hdci()
# A tibble: 8 × 7
  .variable                    .value .lower .upper .width .point .interval
  <chr>                         <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
1 b_Intercept                  78.7   76.3   81.3     0.95 median hdci     
2 b_MONTHJan                    9.32   6.03  12.7     0.95 median hdci     
3 b_MONTHJan:SITUATIONnestMbox  1.39  -3.61   6.19    0.95 median hdci     
4 b_MONTHJan:SITUATIONother    -0.459 -5.37   4.44    0.95 median hdci     
5 b_MONTHJan:SITUATIONtree     -2.07  -6.91   2.56    0.95 median hdci     
6 b_SITUATIONnestMbox           0.679 -2.76   4.67    0.95 median hdci     
7 b_SITUATIONother             -3.33  -6.88   0.508   0.95 median hdci     
8 b_SITUATIONtree               4.80   0.921  8.29    0.95 median hdci     
starling.brm3 %>%
  gather_draws(`b_Intercept.*|b_SITUATION.*|b_MONTH.*`, regex = TRUE) %>%
  ggplot() +
  geom_vline(xintercept = 0, linetype = "dashed") +
  stat_slab(aes(
    x = .value, y = .variable,
    fill = stat(ggdist::cut_cdf_qi(cdf,
      .width = c(0.5, 0.8, 0.95),
      labels = scales::percent_format()
    ))
  ), color = "black") +
  scale_fill_brewer("Interval", direction = -1, na.translate = FALSE)
Warning: `stat(ggdist::cut_cdf_qi(cdf, .width = c(0.5, 0.8, 0.95), labels =
scales::percent_format()))` was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(ggdist::cut_cdf_qi(cdf, .width = c(0.5, 0.8, 0.95),
  labels = scales::percent_format()))` instead.

starling.brm3 %>%
  gather_draws(`.Intercept.*|b_SITUATION.*|b_MONTH.*`, regex = TRUE) %>%
  ggplot() +
  geom_vline(xintercept = 0, linetype = "dashed") +
  stat_halfeye(aes(x = .value, y = .variable)) +
  theme_classic()

starling.brm3$fit %>% plot(type = "intervals")
'pars' not specified. Showing first 10 parameters by default.
ci_level: 0.8 (80% intervals)
outer_level: 0.95 (95% intervals)

starling.brm4 %>%
  gather_draws(`^b_.*`, regex = TRUE) %>%
  filter(.variable != "b_Intercept") %>%
  ggplot() +
  stat_halfeye(aes(x = .value, y = .variable)) +
  facet_wrap(~.variable, scales = "free")

starling.brm4 %>%
  gather_draws(`^b_.*`, regex = TRUE) %>%
  filter(.variable != "b_Intercept") %>%
  ggplot() +
  stat_halfeye(aes(x = .value, y = .variable)) +
  geom_vline(xintercept = 0, linetype = "dashed")

starling.brm4 %>%
  gather_draws(`^b_.*`, regex = TRUE) %>%
  filter(.variable != "b_Intercept") %>%
  ggplot() +
  geom_density_ridges(aes(x = .value, y = .variable), alpha = 0.4) +
  geom_vline(xintercept = 0, linetype = "dashed")
Picking joint bandwidth of 0.385

## Or in colour
starling.brm4 %>%
  gather_draws(`^b_.*`, regex = TRUE) %>%
  filter(.variable != "b_Intercept") %>%
  ggplot() +
  geom_density_ridges_gradient(
    aes(
      x = (.value),
      y = .variable,
      fill = stat(x)
    ),
    alpha = 0.4, colour = "white",
    quantile_lines = TRUE,
    quantiles = c(0.025, 0.975)
  ) +
  geom_vline(xintercept = 1, linetype = "dashed") +
  scale_x_continuous() +
  scale_fill_viridis_c(option = "C")
Picking joint bandwidth of 0.385

## Fractional scale
starling.brm4 %>%
  gather_draws(`^b_.*`, regex = TRUE) %>%
  filter(.variable != "b_Intercept") %>%
  ggplot() +
  geom_density_ridges_gradient(
    aes(
      x = exp(.value),
      y = .variable,
      fill = stat(x)
    ),
    alpha = 0.4, colour = "white",
    quantile_lines = TRUE,
    quantiles = c(0.025, 0.975)
  ) +
  geom_vline(xintercept = 1, linetype = "dashed") +
  scale_x_continuous(trans = scales::log2_trans()) +
  scale_fill_viridis_c(option = "C")
Picking joint bandwidth of 0.556

This is purely a graphical depiction on the posteriors.

starling.brm3 %>% tidy_draws()
# A tibble: 2,400 × 72
   .chain .iteration .draw b_Intercept b_MONTHJan b_SITUATIONnestMbox
    <int>      <int> <int>       <dbl>      <dbl>               <dbl>
 1      1          1     1        80.3       8.46              0.154 
 2      1          2     2        77.3      10.0               1.82  
 3      1          3     3        78.1       7.90             -0.664 
 4      1          4     4        79.0       8.81             -1.15  
 5      1          5     5        78.3      10.4               0.0701
 6      1          6     6        77.5      11.4               2.85  
 7      1          7     7        80.6       6.36             -1.98  
 8      1          8     8        76.7      11.3               2.36  
 9      1          9     9        78.6      11.4               0.0574
10      1         10    10        79.5       8.51              2.37  
# ℹ 2,390 more rows
# ℹ 66 more variables: b_SITUATIONother <dbl>, b_SITUATIONtree <dbl>,
#   `b_MONTHJan:SITUATIONnestMbox` <dbl>, `b_MONTHJan:SITUATIONother` <dbl>,
#   `b_MONTHJan:SITUATIONtree` <dbl>, sd_BIRD__Intercept <dbl>, sigma <dbl>,
#   Intercept <dbl>, `r_BIRD[inside1,Intercept]` <dbl>,
#   `r_BIRD[inside10,Intercept]` <dbl>, `r_BIRD[inside2,Intercept]` <dbl>,
#   `r_BIRD[inside3,Intercept]` <dbl>, `r_BIRD[inside4,Intercept]` <dbl>, …
starling.brm3 %>% spread_draws(`.*Intercept.*|b_SITUATION.*|b_MONTH.*`, regex = TRUE)
# A tibble: 2,400 × 54
   .chain .iteration .draw b_Intercept b_MONTHJan b_SITUATIONnestMbox
    <int>      <int> <int>       <dbl>      <dbl>               <dbl>
 1      1          1     1        80.3       8.46              0.154 
 2      1          2     2        77.3      10.0               1.82  
 3      1          3     3        78.1       7.90             -0.664 
 4      1          4     4        79.0       8.81             -1.15  
 5      1          5     5        78.3      10.4               0.0701
 6      1          6     6        77.5      11.4               2.85  
 7      1          7     7        80.6       6.36             -1.98  
 8      1          8     8        76.7      11.3               2.36  
 9      1          9     9        78.6      11.4               0.0574
10      1         10    10        79.5       8.51              2.37  
# ℹ 2,390 more rows
# ℹ 48 more variables: b_SITUATIONother <dbl>, b_SITUATIONtree <dbl>,
#   `b_MONTHJan:SITUATIONnestMbox` <dbl>, `b_MONTHJan:SITUATIONother` <dbl>,
#   `b_MONTHJan:SITUATIONtree` <dbl>, sd_BIRD__Intercept <dbl>,
#   Intercept <dbl>, `r_BIRD[inside1,Intercept]` <dbl>,
#   `r_BIRD[inside10,Intercept]` <dbl>, `r_BIRD[inside2,Intercept]` <dbl>,
#   `r_BIRD[inside3,Intercept]` <dbl>, `r_BIRD[inside4,Intercept]` <dbl>, …
starling.brm3 %>%
  posterior_samples() %>%
  as_tibble()
Warning: Method 'posterior_samples' is deprecated. Please see ?as_draws for
recommended alternatives.
# A tibble: 2,400 × 63
   b_Intercept b_MONTHJan b_SITUATIONnestMbox b_SITUATIONother b_SITUATIONtree
         <dbl>      <dbl>               <dbl>            <dbl>           <dbl>
 1        80.3       8.46              0.154             -3.78            2.70
 2        77.3      10.0               1.82              -2.64            7.01
 3        78.1       7.90             -0.664             -4.80            7.82
 4        79.0       8.81             -1.15              -5.20            5.98
 5        78.3      10.4               0.0701            -4.92            5.86
 6        77.5      11.4               2.85              -2.20            7.30
 7        80.6       6.36             -1.98              -3.36            3.42
 8        76.7      11.3               2.36              -1.18            7.15
 9        78.6      11.4               0.0574            -2.21            3.61
10        79.5       8.51              2.37              -4.96            5.71
# ℹ 2,390 more rows
# ℹ 58 more variables: `b_MONTHJan:SITUATIONnestMbox` <dbl>,
#   `b_MONTHJan:SITUATIONother` <dbl>, `b_MONTHJan:SITUATIONtree` <dbl>,
#   sd_BIRD__Intercept <dbl>, sigma <dbl>, Intercept <dbl>,
#   `r_BIRD[inside1,Intercept]` <dbl>, `r_BIRD[inside10,Intercept]` <dbl>,
#   `r_BIRD[inside2,Intercept]` <dbl>, `r_BIRD[inside3,Intercept]` <dbl>,
#   `r_BIRD[inside4,Intercept]` <dbl>, `r_BIRD[inside5,Intercept]` <dbl>, …
starling.brm4 |>
  bayes_R2(re.form = NA, summary = FALSE) |>
  median_hdci()
          y      ymin      ymax .width .point .interval
1 0.6272011 0.5386493 0.6942811   0.95 median      hdci
starling.brm4 %>%
  bayes_R2(re.form = ~ (1 | BIRD), summary = FALSE) %>%
  median_hdci()
          y      ymin      ymax .width .point .interval
1 0.6418114 0.5471712 0.7243808   0.95 median      hdci
starling.brm4 |>
  bayes_R2(re.form = ~ (MONTH | BIRD), summary = FALSE) |>
  median_hdci()
          y      ymin      ymax .width .point .interval
1 0.6632913 0.5634415 0.7705282   0.95 median      hdci
starling.brm4 |>
  bayes_R2(re.form = ~ (MONTH + MONTH:SITUATION | BIRD), summary = FALSE) |>
  median_hdci()
          y      ymin      ymax .width .point .interval
1 0.6272011 0.5386493 0.6942811   0.95 median      hdci
starling.brm4 |> modelsummary(
  statistic = c("conf.low", "conf.high"),
  shape = term ~ statistic,
  exponentiate = FALSE
)
Warning: 
`modelsummary` uses the `performance` package to extract goodness-of-fit
statistics from models of this class. You can specify the statistics you wish
to compute by supplying a `metrics` argument to `modelsummary`, which will then
push it forward to `performance`. Acceptable values are: "all", "common",
"none", or a character vector of metrics names. For example: `modelsummary(mod,
metrics = c("RMSE", "R2")` Note that some metrics are computationally
expensive. See `?performance::performance` for details.
 This warning appears once per session.
(1)
Est. 2.5 % 97.5 %
b_Intercept 78.619 76.077 80.971
b_MONTHJan 9.121 5.805 12.577
b_SITUATIONnestMbox 0.599 -2.826 4.083
b_SITUATIONother -3.370 -6.766 -0.066
b_SITUATIONtree 4.648 1.272 7.978
b_MONTHJan × SITUATIONnestMbox 1.620 -3.266 6.338
b_MONTHJan × SITUATIONother -0.501 -5.026 4.263
b_MONTHJan × SITUATIONtree -1.821 -6.752 3.086
sd_BIRD__Intercept 0.645 0.029 2.067
sd_BIRD__MONTHJan 0.961 0.043 3.487
cor_BIRD__Intercept__MONTHJan 0.046 -0.929 0.948
sigma 3.941 3.162 4.777
Num.Obs. 80
R2 0.663
R2 Adj. 0.571
R2 Marg. 0.627
ELPD -233.1
ELPD s.e. 5.4
LOOIC 466.1
LOOIC s.e. 10.8
WAIC 464.7
RMSE 3.58
r2.adjusted.marginal 0.537722827170531
starling.brm4 |> modelplot(exponentiate = FALSE)

10 Further investigations

starling.brm3 %>%
  emmeans(~SITUATION) %>%
  pairs()
Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
NOTE: Results may be misleading due to involvement in interactions
 contrast            estimate lower.HPD upper.HPD
 inside - (nest-box)    -1.37    -4.327     1.382
 inside - other          3.53     0.908     6.402
 inside - tree          -3.76    -6.530    -0.929
 (nest-box) - other      4.97     2.121     7.766
 (nest-box) - tree      -2.37    -5.093     0.281
 other - tree           -7.32   -10.095    -4.574

Results are averaged over the levels of: MONTH 
Point estimate displayed: median 
HPD interval probability: 0.95 
Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
NOTE: Results may be misleading due to involvement in interactions
 contrast  estimate lower.HPD upper.HPD
 Nov - Jan    -9.05     -10.9     -7.29

Results are averaged over the levels of: SITUATION 
Point estimate displayed: median 
HPD interval probability: 0.95 
Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
SITUATION = inside:
 contrast  estimate lower.HPD upper.HPD
 Nov - Jan    -9.32     -12.7     -6.01

SITUATION = nest-box:
 contrast  estimate lower.HPD upper.HPD
 Nov - Jan   -10.73     -14.1     -7.05

SITUATION = other:
 contrast  estimate lower.HPD upper.HPD
 Nov - Jan    -8.81     -12.2     -5.09

SITUATION = tree:
 contrast  estimate lower.HPD upper.HPD
 Nov - Jan    -7.28     -10.9     -3.81

Point estimate displayed: median 
HPD interval probability: 0.95 
Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
# A tibble: 4 × 8
  contrast SITUATION .value .lower .upper .width .point .interval
  <chr>    <fct>      <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
1 Jan/Nov  inside     11.8    7.50   16.4   0.95 median hdci     
2 Jan/Nov  nest-box   13.5    8.65   18.1   0.95 median hdci     
3 Jan/Nov  other      11.7    6.64   16.7   0.95 median hdci     
4 Jan/Nov  tree        8.73   4.17   13.0   0.95 median hdci     
Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
# A tibble: 6 × 8
# Groups:   SITUATION [1]
  SITUATION .chain .iteration .draw   Nov   Jan   Eff  PEff
  <fct>      <int>      <int> <int> <dbl> <dbl> <dbl> <dbl>
1 inside        NA         NA     1  80.3  88.8  8.46  10.5
2 inside        NA         NA     2  77.3  87.3 10.0   13.0
3 inside        NA         NA     3  78.1  86.0  7.90  10.1
4 inside        NA         NA     4  79.0  87.8  8.81  11.2
5 inside        NA         NA     5  78.3  88.7 10.4   13.3
6 inside        NA         NA     6  77.5  88.9 11.4   14.7

# A tibble: 4 × 7
  SITUATION  PEff .lower .upper .width .point .interval
  <fct>     <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
1 inside    11.8    7.50   16.4   0.95 median hdci     
2 nest-box  13.5    8.65   18.1   0.95 median hdci     
3 other     11.7    6.64   16.7   0.95 median hdci     
4 tree       8.73   4.17   13.0   0.95 median hdci     
# A tibble: 4 × 3
  SITUATION  Prob `Prob>10`
  <fct>     <dbl>     <dbl>
1 inside    1         0.801
2 nest-box  1         0.923
3 other     1         0.752
4 tree      1.000     0.278

Compare specific sets of SITUATION within each MONTH

levels(starling$SITUATION)
[1] "inside"   "nest-box" "other"    "tree"    
Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
NOTE: Results may be misleading due to involvement in interactions
 contrast               estimate lower.HPD upper.HPD
 SITUATION.Nat vs Art       4.49     2.104      6.57
 SITUATION.Tree vs I/NB     3.10     0.649      5.38
 SITUATION.Tree vs NB       2.37    -0.281      5.09

Results are averaged over the levels of: MONTH 
Point estimate displayed: median 
HPD interval probability: 0.95 
Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
NOTE: Results may be misleading due to involvement in interactions
# A tibble: 7,200 × 5
# Groups:   contrast [3]
   contrast             .chain .iteration .draw .value
   <chr>                 <int>      <int> <int>  <dbl>
 1 SITUATION.Nat vs Art     NA         NA     1   4.93
 2 SITUATION.Nat vs Art     NA         NA     2   7.22
 3 SITUATION.Nat vs Art     NA         NA     3   8.37
 4 SITUATION.Nat vs Art     NA         NA     4   5.26
 5 SITUATION.Nat vs Art     NA         NA     5   4.68
 6 SITUATION.Nat vs Art     NA         NA     6   4.77
 7 SITUATION.Nat vs Art     NA         NA     7   3.96
 8 SITUATION.Nat vs Art     NA         NA     8   5.57
 9 SITUATION.Nat vs Art     NA         NA     9   3.42
10 SITUATION.Nat vs Art     NA         NA    10   5.18
# ℹ 7,190 more rows
Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
MONTH = Nov:
 contrast               estimate lower.HPD upper.HPD
 SITUATION.Nat vs Art       5.67     2.657      8.70
 SITUATION.Tree vs I/NB     4.45     1.269      7.67
 SITUATION.Tree vs NB       4.09     0.268      7.82

MONTH = Jan:
 contrast               estimate lower.HPD upper.HPD
 SITUATION.Nat vs Art       3.32     0.207      6.02
 SITUATION.Tree vs I/NB     1.66    -1.260      4.87
 SITUATION.Tree vs NB       0.61    -3.318      3.97

Point estimate displayed: median 
HPD interval probability: 0.95 
Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
# A tibble: 6 × 8
# Groups:   contrast [1]
  contrast             .chain .iteration .draw   Nov   Jan    Eff   PEff
  <fct>                 <int>      <int> <int> <dbl> <dbl>  <dbl>  <dbl>
1 SITUATION.Nat vs Art     NA         NA     1  3.91  5.95  2.05   52.4 
2 SITUATION.Nat vs Art     NA         NA     2  7.28  7.16 -0.119  -1.64
3 SITUATION.Nat vs Art     NA         NA     3  9.64  7.10 -2.54  -26.3 
4 SITUATION.Nat vs Art     NA         NA     4  8.09  2.43 -5.66  -69.9 
5 SITUATION.Nat vs Art     NA         NA     5  7.48  1.87 -5.60  -74.9 
6 SITUATION.Nat vs Art     NA         NA     6  7.08  2.46 -4.62  -65.2 

11 References